In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import TimeSeriesSplit
import statsmodels
from statsmodels.tsa.seasonal import seasonal_decompose, STL
import statsmodels.tsa.api as smt
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.stattools import pacf, acf
from statsmodels.stats.stattools import durbin_watson

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

import matplotlib.pyplot as plt
import seaborn as sns

from utils import *

%matplotlib inline
In [2]:
data = pd.read_csv('vodafone_data.csv')
In [3]:
data.head()
Out[3]:
Date Open High Low Close Volume
0 2016-02-11 29.330000 29.510000 29.030001 29.270000 4805000
1 2016-02-12 29.520000 29.950001 29.410000 29.900000 3116500
2 2016-02-16 29.950001 30.420000 29.900000 30.260000 4037900
3 2016-02-17 31.000000 31.320000 30.900000 31.090000 4313800
4 2016-02-18 31.090000 31.150000 30.920000 30.959999 3522800

Експлоративний аналіз даних

In [4]:
data['Date'] = pd.to_datetime(data['Date']).dt.date
data['Date_index'] = pd.DatetimeIndex(data.Date)
In [5]:
data['year'] = pd.to_datetime(data['Date']).dt.year
data['quarter'] = pd.to_datetime(data['Date']).dt.quarter
data['month'] = pd.to_datetime(data['Date']).dt.month
In [6]:
data.Date.agg({min, max})
Out[6]:
min    2016-02-11
max    2021-02-11
Name: Date, dtype: object
In [7]:
data.shape
Out[7]:
(1260, 10)

Розподіл значень ряду по роках

Розподіл кількості значень ряду по роках

In [8]:
data.year.value_counts().plot(kind='bar', rot='45');
plt.title('Qty of observations for each year');
In [9]:
data.year.value_counts(normalize=True).plot(kind='bar', rot='45');
plt.title('Percentage of observations for each year');
Бачимо, що значення часового ряду розподілені практично рівномірно по роках

Розподіл значень ряду

In [10]:
sns.distplot(data.Close);
/home/klimchuk/.local/lib/python3.6/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
In [11]:
fig, axs = plt.subplots(nrows=data.year.nunique(), ncols=1, figsize=(9, 35))

for idx, year in enumerate(data.year.unique()):
    sns.distplot(data.loc[data.year == year, 'Close'], ax=axs[idx]);
    axs[idx].set_title('Distplot for {}'.format(year));
/home/klimchuk/.local/lib/python3.6/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
Розподіл значень по роках дає певні "натяки" на наявність негативного тренду

Аналіз пропущених значень

In [12]:
missing_dates = pd.date_range(start='2016-02-11', end='2021-02-11', freq='D').difference(data.Date)
In [13]:
# we have 568 missing dates
missing_dates.nunique()
Out[13]:
568
In [14]:
missing_dates
Out[14]:
DatetimeIndex(['2016-02-13', '2016-02-14', '2016-02-15', '2016-02-20',
               '2016-02-21', '2016-02-27', '2016-02-28', '2016-03-05',
               '2016-03-06', '2016-03-12',
               ...
               '2021-01-10', '2021-01-16', '2021-01-17', '2021-01-18',
               '2021-01-23', '2021-01-24', '2021-01-30', '2021-01-31',
               '2021-02-06', '2021-02-07'],
              dtype='datetime64[ns]', length=568, freq=None)
In [15]:
missing_dates = pd.DataFrame(missing_dates, columns=['date'])
missing_dates['year'] = missing_dates['date'].dt.year
In [16]:
missing_dates.groupby('year').date.count().plot(kind='bar', rot='45');
plt.title('Qty missing values for each year');
In [17]:
missing_dates.year.value_counts(normalize=True).plot(kind='bar', rot='45');
plt.title('Percentage of missing values for each year');
In [18]:
missing_dates['month'] = missing_dates['date'].dt.month
missing_dates['day'] = missing_dates['date'].dt.day
In [19]:
missing_dates.head()
Out[19]:
date year month day
0 2016-02-13 2016 2 13
1 2016-02-14 2016 2 14
2 2016-02-15 2016 2 15
3 2016-02-20 2016 2 20
4 2016-02-21 2016 2 21
In [20]:
missing_dates.day.value_counts(normalize=True).head()
Out[20]:
25    0.040493
4     0.038732
1     0.036972
2     0.035211
3     0.035211
Name: day, dtype: float64
In [21]:
missing_dates.month.value_counts(normalize=True).head()
Out[21]:
1     0.093310
12    0.091549
7     0.088028
9     0.086268
5     0.084507
Name: month, dtype: float64

Лінійна інтерполяція для обробки пропущених значень

In [22]:
data.head()
Out[22]:
Date Open High Low Close Volume Date_index year quarter month
0 2016-02-11 29.330000 29.510000 29.030001 29.270000 4805000 2016-02-11 2016 1 2
1 2016-02-12 29.520000 29.950001 29.410000 29.900000 3116500 2016-02-12 2016 1 2
2 2016-02-16 29.950001 30.420000 29.900000 30.260000 4037900 2016-02-16 2016 1 2
3 2016-02-17 31.000000 31.320000 30.900000 31.090000 4313800 2016-02-17 2016 1 2
4 2016-02-18 31.090000 31.150000 30.920000 30.959999 3522800 2016-02-18 2016 1 2
In [23]:
data_no_missing = data.copy(deep=True)
In [24]:
data_no_missing.head()
Out[24]:
Date Open High Low Close Volume Date_index year quarter month
0 2016-02-11 29.330000 29.510000 29.030001 29.270000 4805000 2016-02-11 2016 1 2
1 2016-02-12 29.520000 29.950001 29.410000 29.900000 3116500 2016-02-12 2016 1 2
2 2016-02-16 29.950001 30.420000 29.900000 30.260000 4037900 2016-02-16 2016 1 2
3 2016-02-17 31.000000 31.320000 30.900000 31.090000 4313800 2016-02-17 2016 1 2
4 2016-02-18 31.090000 31.150000 30.920000 30.959999 3522800 2016-02-18 2016 1 2
In [25]:
data_no_missing = data_no_missing.set_index('Date_index').asfreq('D')
In [26]:
data_no_missing.head()
Out[26]:
Date Open High Low Close Volume year quarter month
Date_index
2016-02-11 2016-02-11 29.33 29.510000 29.030001 29.27 4805000.0 2016.0 1.0 2.0
2016-02-12 2016-02-12 29.52 29.950001 29.410000 29.90 3116500.0 2016.0 1.0 2.0
2016-02-13 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2016-02-14 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2016-02-15 NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [27]:
for col in ['Open', 'High', 'Low', 'Close', 'Volume']:
    data_no_missing[col] = data_no_missing[col].interpolate()
In [28]:
data_no_missing.head()
Out[28]:
Date Open High Low Close Volume year quarter month
Date_index
2016-02-11 2016-02-11 29.330000 29.510000 29.030001 29.27 4805000.0 2016.0 1.0 2.0
2016-02-12 2016-02-12 29.520000 29.950001 29.410000 29.90 3116500.0 2016.0 1.0 2.0
2016-02-13 NaN 29.627500 30.067501 29.532500 29.99 3346850.0 NaN NaN NaN
2016-02-14 NaN 29.735000 30.185001 29.655000 30.08 3577200.0 NaN NaN NaN
2016-02-15 NaN 29.842501 30.302500 29.777500 30.17 3807550.0 NaN NaN NaN
In [29]:
data_no_missing = data_no_missing.reset_index(drop=False)
In [30]:
data_no_missing.head(3)
Out[30]:
Date_index Date Open High Low Close Volume year quarter month
0 2016-02-11 2016-02-11 29.3300 29.510000 29.030001 29.27 4805000.0 2016.0 1.0 2.0
1 2016-02-12 2016-02-12 29.5200 29.950001 29.410000 29.90 3116500.0 2016.0 1.0 2.0
2 2016-02-13 NaN 29.6275 30.067501 29.532500 29.99 3346850.0 NaN NaN NaN
In [31]:
data_no_missing['Date'] = pd.to_datetime(data_no_missing['Date_index']).dt.date
data_no_missing['year'] = pd.to_datetime(data_no_missing['Date']).dt.year
data_no_missing['quarter'] = pd.to_datetime(data_no_missing['Date']).dt.quarter
data_no_missing['month'] = pd.to_datetime(data_no_missing['Date']).dt.month
In [32]:
data_no_missing.head()
Out[32]:
Date_index Date Open High Low Close Volume year quarter month
0 2016-02-11 2016-02-11 29.330000 29.510000 29.030001 29.27 4805000.0 2016 1 2
1 2016-02-12 2016-02-12 29.520000 29.950001 29.410000 29.90 3116500.0 2016 1 2
2 2016-02-13 2016-02-13 29.627500 30.067501 29.532500 29.99 3346850.0 2016 1 2
3 2016-02-14 2016-02-14 29.735000 30.185001 29.655000 30.08 3577200.0 2016 1 2
4 2016-02-15 2016-02-15 29.842501 30.302500 29.777500 30.17 3807550.0 2016 1 2

Аналіз наявності викидів в даних

In [33]:
fig = plt.figure(figsize=(10, 8))
sns.boxplot(data=data_no_missing, x=data_no_missing.year, y='Close');
plt.title('Box-Plots for each year');

Бачимо, що часовий ряд є досить "чистим", також проглядається негативний тренд

Аналіз наявності тренду, сезонності, циклічності

Візуалізація часового ряду

In [34]:
data_no_missing.set_index('Date').Close.plot(title='Closing price time series', figsize=(10, 7));

Візуалізація значень часового ряду у розбитті по рокам

In [35]:
fig, axs = plt.subplots(nrows=data.year.nunique(), ncols=1, figsize=(10, 50))

for idx, year in enumerate(data.year.unique()):
    
    data_no_missing[data_no_missing.year == year].set_index('Date').Close.plot(
        title='Closing price time series for {}'.format(year), ax=axs[idx], rot='45');

Поквартальна візуалізація значень часового ряду

In [36]:
data_no_missing.set_index('Date_index').Close.resample('Q').mean().plot(
        title='Quarterely Closing price time series', rot='45');
In [37]:
fig, axs = plt.subplots(nrows=1, ncols=data.year.nunique(), figsize=(30, 10))

for idx, year in enumerate(data.year.unique()):
    
    data_no_missing[data_no_missing.year == year].set_index('Date_index').Close.resample('Q').mean().plot(
        title='Quarterely Closing price time series for {}'.format(year), ax=axs[idx], rot='45');
/usr/local/lib/python3.6/dist-packages/pandas/plotting/_matplotlib/core.py:1095: UserWarning: Attempting to set identical left == right == 204.0 results in singular transformations; automatically expanding.
  ax.set_xlim(left, right)

Помісячна візуалізація значень часового ряду

In [38]:
data_no_missing.set_index('Date_index').Close.resample('M').mean().plot(
        title='Quarterely Closing price time series', rot='45');
In [39]:
fig, axs = plt.subplots(nrows=1, ncols=data.year.nunique(), figsize=(30, 10))

for idx, year in enumerate(data.year.unique()):
    
    data_no_missing[data_no_missing.year == year].set_index('Date_index').Close.resample('M').mean().plot(
        title='Quarterely Closing price time series for {}'.format(year), ax=axs[idx], rot='45');
Наведені графіки свідчать про наявність так званого "mean-revesring" тренду

Перевірка на стаціонарність

Тест Дікі-Фулера

In [40]:
stationarity_test(data_no_missing.Close, smt.adfuller)
Out[40]:
{'adfuller_statistics': -1.2810249204236723,
 'adfuller_p_value': 0.63780811113391,
 'adfuller_critical_values': {'1%': -3.4339382310452033,
  '5%': -2.863125003847544,
  '10%': -2.56761380228936}}

KPSS тест

In [41]:
stationarity_test(data_no_missing.Close, smt.kpss)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1882: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.

  warn_msg.format(direction="smaller"), InterpolationWarning
Out[41]:
{'kpss_statistics': 5.98948825089506,
 'kpss_p_value': 0.01,
 'kpss_critical_values': {'10%': 0.347,
  '5%': 0.463,
  '2.5%': 0.574,
  '1%': 0.739}}

Бачимо, що вихідний часовий ряд не є стаціонарним

Декомпозиція часового ряду

In [42]:
data_no_missing.head(1)
Out[42]:
Date_index Date Open High Low Close Volume year quarter month
0 2016-02-11 2016-02-11 29.33 29.51 29.030001 29.27 4805000.0 2016 1 2
In [43]:
data_no_missing = data_no_missing.set_index('Date_index')

Additive model

period=5
In [44]:
sesonal_decomposition_additive_5 = plot_decomposition(data_no_missing.Close, 'additive', 5)
Тест на стаціонарність для залишків
In [45]:
stationarity_test(sesonal_decomposition_additive_5.resid.dropna(), smt.adfuller)
Out[45]:
{'adfuller_statistics': -15.827107274901543,
 'adfuller_p_value': 1.0043291380950057e-28,
 'adfuller_critical_values': {'1%': -3.4339840952648695,
  '5%': -2.8631452508003057,
  '10%': -2.567624583142913}}
In [46]:
stationarity_test(sesonal_decomposition_additive_5.resid.dropna(), smt.kpss)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
Out[46]:
{'kpss_statistics': 0.011048183295121446,
 'kpss_p_value': 0.1,
 'kpss_critical_values': {'10%': 0.347,
  '5%': 0.463,
  '2.5%': 0.574,
  '1%': 0.739}}

Additive model

period=20
In [47]:
sesonal_decomposition_additive_20 = plot_decomposition(data_no_missing.Close, 'additive', 20)
Тест на стаціонарність для залишків
In [48]:
stationarity_test(sesonal_decomposition_additive_20.resid.dropna(), smt.adfuller)
Out[48]:
{'adfuller_statistics': -12.927870360509402,
 'adfuller_p_value': 3.754426786446774e-24,
 'adfuller_critical_values': {'1%': -3.434022876868246,
  '5%': -2.8631623708604996,
  '10%': -2.5676336990659903}}
In [49]:
stationarity_test(sesonal_decomposition_additive_20.resid.dropna(), smt.kpss)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
Out[49]:
{'kpss_statistics': 0.012725962247832222,
 'kpss_p_value': 0.1,
 'kpss_critical_values': {'10%': 0.347,
  '5%': 0.463,
  '2.5%': 0.574,
  '1%': 0.739}}

Additive model

period=253
In [50]:
sesonal_decomposition_additive_253 = plot_decomposition(data_no_missing.Close, 'additive', 253)
Тест на стаціонарність для залишків
In [51]:
stationarity_test(sesonal_decomposition_additive_253.resid.dropna(), smt.adfuller)
Out[51]:
{'adfuller_statistics': -4.317859037108565,
 'adfuller_p_value': 0.000413621436312785,
 'adfuller_critical_values': {'1%': -3.4345140199818918,
  '5%': -2.8633791658322503,
  '10%': -2.567749139071643}}
In [52]:
stationarity_test(sesonal_decomposition_additive_253.resid.dropna(), smt.kpss)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
Out[52]:
{'kpss_statistics': 0.09266132310912872,
 'kpss_p_value': 0.1,
 'kpss_critical_values': {'10%': 0.347,
  '5%': 0.463,
  '2.5%': 0.574,
  '1%': 0.739}}

Multiplicative model

period=5
In [53]:
sesonal_decomposition_multiplicative_5 = plot_decomposition(data_no_missing.Close, 'multiplicative', 5)
Тест на стаціонарність для залишків
In [54]:
stationarity_test(sesonal_decomposition_multiplicative_5.resid.dropna(), smt.adfuller)
Out[54]:
{'adfuller_statistics': -15.133115752250903,
 'adfuller_p_value': 7.184679279260353e-28,
 'adfuller_critical_values': {'1%': -3.4339840952648695,
  '5%': -2.8631452508003057,
  '10%': -2.567624583142913}}
In [55]:
stationarity_test(sesonal_decomposition_multiplicative_5.resid.dropna(), smt.kpss)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
Out[55]:
{'kpss_statistics': 0.032780619982659895,
 'kpss_p_value': 0.1,
 'kpss_critical_values': {'10%': 0.347,
  '5%': 0.463,
  '2.5%': 0.574,
  '1%': 0.739}}

Multiplicative model

period=20
In [56]:
sesonal_decomposition_multiplicative_20 = plot_decomposition(data_no_missing.Close, 'multiplicative', 20)
Тест на стаціонарність для залишків
In [57]:
stationarity_test(sesonal_decomposition_multiplicative_20.resid.dropna(), smt.adfuller)
Out[57]:
{'adfuller_statistics': -12.969482066859554,
 'adfuller_p_value': 3.102846019516205e-24,
 'adfuller_critical_values': {'1%': -3.4340249409468155,
  '5%': -2.8631632820375903,
  '10%': -2.5676341842417187}}
In [58]:
stationarity_test(sesonal_decomposition_multiplicative_20.resid.dropna(), smt.kpss)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
Out[58]:
{'kpss_statistics': 0.029767313939175324,
 'kpss_p_value': 0.1,
 'kpss_critical_values': {'10%': 0.347,
  '5%': 0.463,
  '2.5%': 0.574,
  '1%': 0.739}}

Multiplicative model

period=253
In [59]:
sesonal_decomposition_multiplicative_253 = plot_decomposition(data_no_missing.Close, 'multiplicative', 253)
Тест на стаціонарність для залишків
In [60]:
stationarity_test(sesonal_decomposition_multiplicative_253.resid.dropna(), smt.adfuller)
Out[60]:
{'adfuller_statistics': -4.185755600115316,
 'adfuller_p_value': 0.00069645297351933,
 'adfuller_critical_values': {'1%': -3.4345487774977768,
  '5%': -2.8633945067663817,
  '10%': -2.5677573081032214}}
In [61]:
stationarity_test(sesonal_decomposition_multiplicative_253.resid.dropna(), smt.kpss)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
Out[61]:
{'kpss_statistics': 0.09682880596583154,
 'kpss_p_value': 0.1,
 'kpss_critical_values': {'10%': 0.347,
  '5%': 0.463,
  '2.5%': 0.574,
  '1%': 0.739}}

Побудова моделей за допомогою крос-валідації

In [62]:
data_no_missing = data_no_missing.reset_index(drop=False)
In [73]:
all_metrics_df, all_metrics_df_test = make_cross_validation(data_no_missing, n_splits=7, test_size=100)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
RMSE:     0.3626
RSS:   148.3287
MSE:     0.1315
MAE:     0.2241
MAPE:     0.0084
R2:     0.9925
RMSE:     0.1997
RSS:     3.9890
MSE:     0.0399
MAE:     0.1362
MAPE:     0.0079
R2:     0.9717
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
RMSE:     0.3423
RSS:   132.1507
MSE:     0.1172
MAE:     0.2098
MAPE:     0.0084
R2:     0.9946
RMSE:     0.2662
RSS:     7.0841
MSE:     0.0708
MAE:     0.1427
MAPE:     0.0080
R2:     0.9620
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/base/model.py:548: HessianInversionWarning: Inverting hessian failed, no bse or cov_params available
  'available', HessianInversionWarning)
RMSE:     0.3336
RSS:   125.5544
MSE:     0.1113
MAE:     0.2051
MAPE:     0.0086
R2:     0.9951
RMSE:     0.1869
RSS:     3.4933
MSE:     0.0349
MAE:     0.1207
MAPE:     0.0060
R2:     0.9062
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/base/model.py:548: HessianInversionWarning: Inverting hessian failed, no bse or cov_params available
  'available', HessianInversionWarning)
RMSE:     0.3226
RSS:   117.3756
MSE:     0.1041
MAE:     0.1963
MAPE:     0.0085
R2:     0.9955
RMSE:     0.4523
RSS:    20.4581
MSE:     0.2046
MAE:     0.2999
MAPE:     0.0194
R2:     0.9761
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/base/model.py:568: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  ConvergenceWarning)
RMSE:     0.3739
RSS:   157.7194
MSE:     0.1398
MAE:     0.2201
MAPE:     0.0104
R2:     0.9948
RMSE:     0.3099
RSS:     9.6038
MSE:     0.0960
MAE:     0.1959
MAPE:     0.0127
R2:     0.9282
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
RMSE:     0.3871
RSS:   169.0189
MSE:     0.1498
MAE:     0.2266
MAPE:     0.0113
R2:     0.9949
RMSE:     0.1750
RSS:     3.0633
MSE:     0.0306
MAE:     0.1308
MAPE:     0.0090
R2:     0.9455
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1850: FutureWarning: The behavior of using nlags=None will change in release 0.13.Currently nlags=None is the same as nlags="legacy", and so a sample-size lag length is used. After the next release, the default will change to be the same as nlags="auto" which uses an automatic lag length selection method. To silence this warning, either use "auto" or "legacy"
  warnings.warn(msg, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1886: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

  warn_msg.format(direction="greater"), InterpolationWarning
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
<Figure size 720x576 with 0 Axes>
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py:472: FutureWarning: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

To silence this warning and continue using ARMA and ARIMA until they are
removed, use:

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

  warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
/home/klimchuk/.local/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:579: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
  ' ignored when e.g. forecasting.', ValueWarning)
RMSE:     0.3850
RSS:   167.2038
MSE:     0.1482
MAE:     0.2271
MAPE:     0.0118
R2:     0.9948
RMSE:     0.2551
RSS:     6.5094
MSE:     0.0651
MAE:     0.1680
MAPE:     0.0099
R2:     0.9304
In [74]:
all_metrics_df
Out[74]:
RMSE RSS MSE MAE MAPE R2 durbin_watson
Cross-val iteration
0 0.362625 148.328692 0.131497 0.224067 0.008406 0.992470 1.952028
1 0.342279 132.150653 0.117155 0.209835 0.008353 0.994623 1.937324
2 0.333627 125.554401 0.111307 0.205088 0.008634 0.995129 1.881437
3 0.322578 117.375577 0.104056 0.196313 0.008517 0.995490 1.889807
4 0.373928 157.719378 0.139822 0.220100 0.010356 0.994845 1.967694
5 0.387091 169.018914 0.149839 0.226574 0.011271 0.994882 1.997653
6 0.385007 167.203788 0.148230 0.227127 0.011757 0.994809 1.997512
In [75]:
all_metrics_df_test
Out[75]:
RMSE RSS MSE MAE MAPE R2 durbin_watson
Cross-val iteration
0 0.199725 3.989019 0.039890 0.136175 0.007870 0.971710 1.952028
1 0.266160 7.084140 0.070841 0.142656 0.007994 0.961977 1.937324
2 0.186904 3.493324 0.034933 0.120744 0.006046 0.906160 1.881437
3 0.452306 20.458108 0.204581 0.299875 0.019375 0.976064 1.889807
4 0.309901 9.603844 0.096038 0.195896 0.012715 0.928230 1.967694
5 0.175023 3.063294 0.030633 0.130778 0.009028 0.945540 1.997653
6 0.255136 6.509438 0.065094 0.167960 0.009936 0.930437 1.997512

Розподіл метрик по ітераціям

Тренувальні вибірки

In [76]:
fig, axs = plt.subplots(nrows=all_metrics_df.shape[1], ncols=1, figsize=(15, 30))

for idx, metric_title in enumerate(all_metrics_df.columns):
    axs[idx].plot(all_metrics_df[metric_title])
    axs[idx].set_title('{} distribution through iterations'.format(metric_title))

Тестувальні вибірки

In [77]:
fig, axs = plt.subplots(nrows=all_metrics_df.shape[1], ncols=1, figsize=(15, 30))

for idx, metric_title in enumerate(all_metrics_df.columns):
    axs[idx].plot(all_metrics_df_test[metric_title])
    axs[idx].set_title('{} distribution through iterations'.format(metric_title))
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: